LIBRARIES
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(mapdata)
library(viridis)
## Loading required package: viridisLite
library(wesanderson)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Map of World Confirmed Cases
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_")
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_character(),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
borders("world", colour = NA, fill = "grey70") +
theme_bw() +
geom_point(shape = 21, color='pink', fill='pink', alpha = 0.5) +
labs(title = 'World COVID-19 Confirmed cases',x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)
## Warning: Removed 54 rows containing missing values (geom_point).

Map of Continential US Confirmed Cases
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-05-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Country_Region == "US") %>%
filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa", "Puerto Rico","Northern Mariana Islands", "Virgin Islands", "Recovered", "Guam", "Grand Princess", "District of Columbia", "Diamond Princess")) %>%
filter(Lat > 0)
## Parsed with column specification:
## cols(
## FIPS = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
borders("state", colour = "black", fill = "grey90") +
theme_bw() +
geom_point(shape = 21, color='orange', fill='orange', alpha = 0.5) +
labs(title = 'Confirmed Cases of COVID-19 in the Continential US', x = '', y = '',
size ="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)

Anisa Dhana Example
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-05-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Country_Region == "US") %>%
filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
"Puerto Rico","Northern Mariana Islands",
"Virgin Islands", "Recovered", "Guam", "Grand Princess",
"District of Columbia", "Diamond Princess")) %>%
filter(Lat > 0)
## Parsed with column specification:
## cols(
## FIPS = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
borders("state", colour = "black", fill = "grey90") +
theme_bw() +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'COVID-19 Confirmed Cases in the US', x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)

mybreaks <- c(1, 100, 1000, 10000, 10000)
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed)) +
borders("state", colour = "white", fill = "grey85") +
geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=0.7) +
scale_size_continuous(name="Cases", trans="log", range=c(1,7),
breaks=mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
scale_color_viridis_c(option="magma",name="Cases",
trans="log", breaks=mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
# Cleaning up the graph
theme_void() +
guides( colour = guide_legend()) +
labs(title = "Anisa Dhana's lagout for COVID-19 Confirmed Cases in the US'") +
theme(
legend.position = "bottom",
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA)
) +
coord_fixed(ratio=1.5)
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 40 rows containing missing values (geom_point).

Mapping Data to Shapes
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Country_Region == "US") %>%
group_by(Province_State) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Province_State = tolower(Province_State))
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_character(),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# load the US map data
us <- map_data("state")
# Join the US map data with daily report
state_join <- left_join(us, daily_report, by = c("region" = "Province_State"))
view(state_join)
Wes Anderson Color Palattes
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
scale_fill_gradientn(colours =
wes_palette("GrandBudapest1", 100, type = "continuous"),
trans = "log10") +
labs(title = "Confirmed COVID-19 Cases in the Continential US'")

RColorBrewer Color Palattes and Number of Confirmed Cases per US County
library(RColorBrewer)
report_03_27_2020 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_") %>%
unite(Key, Admin2, Province_State, sep = ".") %>%
group_by(Key) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Key = tolower(Key))
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_character(),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# formatting the map data
us <- map_data("state")
counties <- map_data("county") %>%
unite(Key, subregion, region, sep = ".", remove = FALSE)
# joining tibbles
state_join <- left_join(counties, report_03_27_2020, by = c("Key"))
# mapping
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
borders("state", colour = "black") +
geom_polygon(data = state_join, aes(fill = Confirmed)) +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "OrRd"),
breaks = c(1, 10, 100, 1000, 10000, 100000),
trans = "log10", na.value = "Light Yellow") +
ggtitle("Number of Confirmed Cases by US County") +
theme_bw()
## Warning: Transformation introduced infinite values in discrete y-axis

Massachusetts Confirmed COVID-19 Cases Mapping
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Province_State == "Massachusetts") %>%
group_by(Admin2) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Admin2 = tolower(Admin2))
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_character(),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
us <- map_data("state")
ma_us <- subset(us, region == "massachusetts")
counties <- map_data("county")
ma_county <- subset(counties, region == "massachusetts")
state_join <- left_join(ma_county, daily_report, by = c("subregion" = "Admin2"))
# plotting state map
ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
geom_polygon(data = state_join, aes(fill = Confirmed), color = "white") +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "BrBG"),
trans = "log10") +
labs(title = "Confirmed COVID-19 Cases in Massachusetts'")

Plotly Confirmed Cases in MA
ggplotly(
ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
geom_polygon(data = state_join, aes(fill = Confirmed), color = "grey90") +
scale_fill_gradientn(colours =
wes_palette("Chevalier1", 100, type = "continuous")) +
ggtitle("Confirmed COVID-19 Cases in MA") +
labs(x=NULL, y=NULL) +
theme(panel.border = element_blank()) +
theme(panel.background = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(axis.text = element_blank()))
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
EXERCISE 1
# summarize the counts for each country for 9/26/2020
daily_report_9_26 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
rename(Long = "Long_") %>%
select(Country_Region, Long, Lat, Province_State, Confirmed) %>%
group_by(Country_Region, Long, Lat, Confirmed) %>%
summarize(Confirmed = sum(Confirmed))
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` regrouping output by 'Country_Region', 'Long', 'Lat' (override with `.groups` argument)
ggplot(daily_report_9_26, aes(x = Long, y = Lat, size = Confirmed/1700)) +
borders("world", colour = NA, fill = "grey80") +
theme_bw() +
geom_point(shape = 23, color='red', fill='red', alpha = 0.4) +
labs(title = 'Global Confirmed COVID-19 Cases', x = '', y = '',
size ="Cases (x1700))") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)
## Warning: Removed 46 rows containing missing values (geom_point).

EXERCISE 2
# Update Anisa Dhana's graph layout of the US to 9/26/2020
daily_report_us_9_26 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports_us/09-26-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Country_Region == "US") %>%
filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
"Puerto Rico","Northern Mariana Islands",
"Virgin Islands", "Recovered", "Guam", "Grand Princess",
"District of Columbia", "Diamond Princess"))
## Parsed with column specification:
## cols(
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## FIPS = col_double(),
## Incident_Rate = col_double(),
## People_Tested = col_double(),
## People_Hospitalized = col_logical(),
## Mortality_Rate = col_double(),
## UID = col_double(),
## ISO3 = col_character(),
## Testing_Rate = col_double(),
## Hospitalization_Rate = col_logical()
## )
mybreaks <- c(1, 100, 1000, 10000, 10000)
ggplot(daily_report_us_9_26, aes(x = Long, y = Lat, size = Confirmed)) +
borders("state", colour = "white", fill = "grey90") +
geom_point(aes(x = Long, y = Lat, size = Confirmed, color = Confirmed), stroke = F, alpha = 0.7) +
scale_size_continuous(name = "Cases", trans = "log", range = c(1,7),
breaks = mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-49,999", "50,000+")) +
scale_color_viridis_c(option = "magma",name = "Cases",
trans = "log", breaks = mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-49,999", "50,000+")) +
theme_void() +
guides( colour = guide_legend()) +
labs(title = "Edited Anisa Dhana's lagout for COVID-19 Confirmed Cases in the US'") +
theme(
legend.position = "bottom",
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA)
) +
coord_fixed(ratio=1.5)

# Update Anisa Dhana's graph layout of the US to 9/26/2020 --> filtered from global data
daily_report_filtered_us_9_26 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Country_Region == "US") %>%
filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
"Puerto Rico","Northern Mariana Islands",
"Virgin Islands", "Recovered", "Guam", "Grand Princess",
"District of Columbia", "Diamond Princess"))
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
mybreaks <- c(1, 100, 1000, 10000, 10000)
ggplot(daily_report_filtered_us_9_26, aes(x = Long, y = Lat, size = Confirmed)) +
borders("state", colour = "white", fill = "grey90") +
geom_point(aes(x = Long, y = Lat, size = Confirmed, color = Confirmed), stroke = F, alpha = 0.7) +
scale_size_continuous(name = "Cases", range = c(1,16),
breaks = mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-49,999", "50,000+")) +
scale_color_viridis_c(option = "magma", name = "Cases",
trans = "log", breaks = mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-49,999", "50,000+")) +
theme_void() +
guides( colour = guide_legend()) +
labs(title = "Edited Anisa Dhana's lagout for COVID-19 Confirmed Cases in the US'") +
theme(
legend.position = "bottom",
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA)
) +
coord_fixed(ratio=1.5)
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Removed 59 rows containing missing values (geom_point).

EXERCISE 3
# Updated Number of Confirmed Cases by US County for 9/26/2020
report_09_26_2020 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
rename(Long = "Long_") %>%
unite(Key, Admin2, Province_State, sep = ".") %>%
group_by(Key) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Key = tolower(Key))
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# formatting the map data
us <- map_data("state")
counties <- map_data("county") %>%
unite(Key, subregion, region, sep = ".", remove = FALSE)
# joining tibbles
state_join <- left_join(counties, report_09_26_2020, by = c("Key"))
# mapping
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
borders("state", colour = "black") +
geom_polygon(data = state_join, aes(fill = Confirmed)) +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "RdYlGn"),
breaks = c(1, 10, 100, 1000, 10000, 100000),
trans = "log10", na.value = "Grey") +
ggtitle("Number of Confirmed Cases by US County") +
theme_minimal()
## Warning: Transformation introduced infinite values in discrete y-axis

EXERCISE 4
# Make an interactive plot of a new state --> Alabama
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Province_State == "Alabama") %>%
group_by(Admin2) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Admin2 = tolower(Admin2))
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
us <- map_data("state")
al_us <- subset(us, region == "alabama")
counties <- map_data("county")
al_county <- subset(counties, region == "alabama")
state_join <- left_join(al_county, daily_report, by = c("subregion" = "Admin2"))
# plot state map
ggplot(data = al_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
geom_polygon(data = state_join, aes(fill = Confirmed), color = "white") +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "BuPu"),
trans = "log10") +
labs(title = "COVID-19 Confirmed Cases in Alabama'")

ggplotly(
ggplot(data = al_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
scale_fill_gradientn(colours =
wes_palette("Moonrise1", 100, type = "continuous")) +
ggtitle("COVID-19 Cases in AL") +
labs(x=NULL, y=NULL) +
theme(panel.border = element_blank()) +
theme(panel.background = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(axis.text = element_blank())
)
EXERCISE 5
# Concise Report